clear all 
set maxvar 10000
set more off

* set up the working directory in your computer
cd 

use klowf_analysis, clear 

* --------------------- Sample Selection 
gen out_sample = 0 if wave == 1
replace out_sample = 1 if out_sample == 0 & r_marital != 2  // excluding those who are not currently married
replace out_sample = 2 if out_sample == 0 & r_age > 40 // age-bound

* sample follow-up 
bysort pid_1: egen n_survey = count(wave) if wave < 4
replace out_sample = 3 if out_sample == 0 & n_survey == 1 // excluding those who are not followed up in later survey 

* --------------------- variable specification 
* birth parity ----------------------------
gen b_n0 = b_number == 0 if ~missing(b_number)
gen b_n1 = b_number == 1 if ~missing(b_number)
gen b_n2 = b_number >= 2 if ~missing(b_number)

* change the unit of income / assets 
replace h_income = h_income * 10 
replace h_asset = h_asset * 10 
replace p_income = p_income * 10 
replace r_income = r_income * 10

* ------ measure log wage ratio of husband-wife
gen lnr_inc = ln(r_income+1)
gen lnp_inc = ln(p_income+1)
gen log_rp = lnr_inc - lnp_inc 

gen ln_asset = ln(h_asset+0.01)
gen ln_income = ln(h_income+0.01)

* ------ impute the wage of non-working women 
reg lnr_inc i.r_educ##c.r_age i.r_married c.r_pastjob_d##c.r_pastjob_d if r_work == 1
predict yhat_rinc
replace yhat_rinc = lnr_inc if ~missing(lnr_inc)

gen log_rp_hat = yhat_rinc - lnp_inc 

* -------------- Dependent Variables 
xtset id wave 
gen t2_b_exp = f1.b_experience
gen t3_b_exp = f2.b_experience

egen ever_b = rowtotal(t?_b_exp)
replace ever_b = 1 if ever_b > 1 & ~missing(ever_b)

* ------ measure : dyad embeddedness 
egen e_dyad = rowmean(n_dyad_movie n_dyad_activity)
recode e_dyad (min/2.5=0) (3  /max=1), gen(strong_dyad)

* ----- measure : tradic embeddedness
gen e_triad_w = 0 if ~missing(n_livetogether_wpar) & ~missing(n_livetogether_wpar)
replace e_triad_w = 1 if n_livetogether_wpar == 1 
replace e_triad_w = 1 if n_triad_visit_wpar >= 1 & ~missing(n_triad_visit_wpar)

gen e_triad_h = 0 if ~missing(n_livetogether_hpar) & ~missing(n_livetogether_hpar)
replace e_triad_h = 1 if n_livetogether_hpar == 1
replace e_triad_h = 1 if n_triad_visit_hpar >= 1 & ~missing(n_triad_visit_hpar)

gen strong_triad = e_triad_w 

gen strong = 1 if ~missing(strong_dyad) & ~missing(strong_triad)
replace strong = 0 if strong_dyad == 0 & strong_triad == 0

* missing variable check 
egen n_missing = rowmiss(r_age r_educ p_age p_educ r_work p_work ever_b strong h_income h_asset)
replace n_missing = n_missing + 1 if r_work == 1 & missing(r_income)
replace n_missing = n_missing + 1 if p_work == 1 & missing(p_income)

replace out_sample = 4 if out_sample == 0 & n_missing > 0 

sum r_age r_educ p_age p_educ r_work p_work ever_b strong  /*
*/ h_income h_asset r_income p_income lnr_inc lnp_inc log_rp log_rp_hat if out_sample == 0


* ---------------- descriptive tables across different types of embeddedness 
gen stype = 0 if out_sample == 0 
replace stype = 1 if out_sample == 0 & strong == 0 
replace stype = 2 if out_sample == 0 & strong == 1 

* --------------- Basic Summary Characteristics
gen v_name = ""
gen v_N = .
gen v_mean = .
gen v_sd = .

gen v1_N = .
gen v1_mean = .
gen v1_sd = .

gen v2_N = .
gen v2_mean = .
gen v2_sd = .

local n = 1
foreach X of varlist ever_b r_age r_educ p_age p_educ h_income ln_income h_asset ln_asset b_n0 b_n1 b_n2  r_work p_work r_income lnr_inc p_income lnp_inc log_rp log_rp_hat strong {
	replace v_name = "`X'" in `n'
	* ----------- all 
	qui sum `X' if out_sample == 0
	replace v_N = r(N) in `n'
	replace v_sd = r(sd) in `n' 
	qui mean `X' [pw=wt] if out_sample == 0
	replace v_mean = _b[`X'] in `n'
	
	* ----------- type 1
	qui sum `X' if out_sample == 0 & stype == 1
	replace v1_N = r(N) in `n'
	replace v1_sd = r(sd) in `n' 
	if `r(N)' > 0 {
		qui mean `X' [pw=wt] if out_sample == 0 & stype == 1
		replace v1_mean = _b[`X'] in `n'
	}
	
	* ----------- type 2
	qui sum `X' if out_sample == 0 & stype == 2
	replace v2_N = r(N) in `n'
	replace v2_sd = r(sd) in `n' 
	if `r(N)' > 0 {
		qui mean `X' [pw=wt] if out_sample == 0 & stype == 2
	}
	replace v2_mean = _b[`X'] in `n'
	
	local n = `n'+1
}

* export table 1 
outsheet v* using table1_summary.csv, comma replace, if v_name != ""
drop v*

* export data sets for displaying figure 2
outsheet r_age r_work log_rp log_rp_hat strong b_number ever_b if out_sample == 0 using fertility_descriptive1.csv, replace comma

**-------------------- logistic regression models
label var r_age "R's age"
label var r_educ "R's years of education"
label var p_age "Partner's age"
label var p_educ "Partner's years of education"
label var ln_income "Log(Household Income+0.01)"
label var ln_asset "Log(Household Asset+0.01)"
label var r_work "R is working"
label var p_work "Parter is working"

label var b_n1 "Prior Birth = 1 (ref=0)"
label var b_n2 "Prior Birth >= 2 (ref=0)"

label var ever_b "New Birth"

label var strong "Strong Embededness"
label var log_rp "Log Wage Ratio"
label var log_rp_hat "Log Wage Ratio [imputed]"

* regression table 2
local cv b_n1 b_n2 r_age r_educ p_age p_educ ln_income ln_asset
estimates clear 
qui logit ever_b `cv' log_rp_hat strong  [pw=wt] if out_sample == 0 & r_age <= 40
estimates store hall1 
qui logit ever_b `cv' c.log_rp_hat##c.strong [pw=wt]  if out_sample == 0 & r_age <= 40
estimates store hall2
qui logit ever_b `cv' log_rp_hat [pw=wt] if out_sample == 0 & strong == 0  & r_age <= 40
estimates store hweak 
qui logit ever_b `cv' log_rp_hat [pw=wt] if out_sample == 0 & strong == 1 & r_age <= 40
estimates store hstrong
qui logit ever_b `cv' log_rp strong [pw=wt] if out_sample == 0 & r_age <= 40
estimates store yall1 
qui logit ever_b `cv' c.log_rp##c.strong [pw=wt] if out_sample == 0 & r_age <= 40
estimates store yall2
qui logit ever_b `cv' log_rp [pw=wt] if out_sample == 0 & strong == 0  & r_age <= 40
estimates store yweak 
qui logit ever_b `cv' log_rp  [pw=wt] if out_sample == 0 & strong == 1 & r_age <= 40
estimates store ystrong
esttab * using table2_logit_results.csv, csv replace star(+ 0.1 * 0.05 ** 0.01) nogap r2 label mtitle se b(%9.2f)

* predicted probability for figure 3
local cv b_n1 b_n2 r_age r_educ p_age p_educ ln_income ln_asset
qui logit ever_b `cv' c.log_rp_hat##i.strong [pw=wt] if out_sample == 0 & r_age <= 40
sum log_rp_hat
margins strong, at(c.log_rp_hat = (`r(min)' (0.1) `r(max)'))

matrix a = r(table)'
matrix name = r(at)
svmat a
svmat name 

outsheet name9 if name9 != . using predicted_at.csv, comma replace 
outsheet a1-a9 if a1 !=. using predicted_coef.csv, comma replace 
